1. Download and Clean refugee employment data
- Data downloaded as individual Excel files from:
- The code below was used to extract the total number of refugees, the
fraction of working age and the percentage employed for the top 10 most
frequent refugee nationalities
- Nationalities were:
- Data does not include Ukrainians with S permits
# Extract Cantonal employment Data for the 10 most common refugee nationalities --------
#Create list of cantons in individual Excel worksheets
canton_list <- c("AG","AI","AR","BE","BL","BS","FR","GE","GL","GR","JU","LU",
"NE","NW","OW","SG","SH","SO","SZ","TG","TI","UR","VD","VS","ZH","ZG")
# Creates list of downloaded data files
file_list <- list.files(path = "Raw Data/", full.names = TRUE)
canton_employment_df <- data.frame()
for (file in file_list) {
#Extract date from filename
date_data <- str_extract(file, "\\d{4}-\\d{2}")
#Select top 10 nationalities for the individual file
sheet_nationality <- "CH-Nati"
nationality_data <- read_excel(file, sheet = sheet_nationality, range="A5:E129")
renamed_data_nationality <- nationality_data %>%
rename(Country=...1,
Total=...2,
Employment_Age=...3,
Employed=...4,
Employed_percent=...5)
#Remove all regional totals keeping only total number of refugees
remove <- c("Staat unbekannt","Ohne Nationalität","Afrika","Nordafrika",
"Subsahara","Amerika","Asien","Europa","Herkunft unbekannt")
nationality_cleaned <- renamed_data_nationality %>%
filter(!str_detect(Country, "Total")) %>%
filter(!Country %in% remove)
numbers_nationality <- nationality_cleaned %>%
select(Country, Total) %>%
arrange(desc(Total)) %>%
head(11)
country_list <- numbers_nationality$Country
for (canton in canton_list) {
canton_data <- read_excel(file, sheet = canton) %>%
filter(`6-23` %in% country_list)
#Keep only first 5 columns containing employment data
canton_summary <- canton_data[1:5] %>%
mutate(Canton = canton)
renamed_canton <- canton_summary %>%
rename(
Country = `6-23`,
Total = ...2,
Employment_Age = ...3,
Employed = ...4,
Employed_percent = ...5
)
#Add date to the extracted employment data
renamed_canton_date <- renamed_canton %>%
mutate(Date=date_data)
#Combine data from all worksheets and workbooks into single dataframe
canton_employment_df <- rbind(canton_employment_df, renamed_canton_date)
}
}
rio::export(canton_employment_df, "Nationality and Employment3.csv")
## Country Total Employment_Age Employed Employed_percent Canton Date
## 1 Gesamttotal 889 584 95 0.16267123287671234 AG 2013-01
## 2 Äthiopien 5 4 0 0 AG 2013-01
## 3 Eritrea 635 408 59 0.14460784313725492 AG 2013-01
## 4 Somalia 11 9 2 0.22222222222222221 AG 2013-01
## 5 Kolumbien 22 9 1 0.1111111111111111 AG 2013-01
2. Plot total number of refugees and employment rate over time
(2013-2023)
- Data downloaded as individual Excel files from:
- The code below was used to extract the total number of refugees, the
fraction of working age and the percentage employed for the top 10 most
frequent refugee nationalities
- Nationalities were:
- Data does not include Ukrainians with S permits
library(plotly)
library(rio)
library(zoo)
employment_data <- rio::import("..\\Nationality and Employment3.csv")
#Clean data and correct types
cleaned_data <- employment_data %>%
mutate(Date=(paste(Date,"-01",sep=""))) %>% #Add day
mutate(Date=as.POSIXct(Date, format = "%Y-%m-%d")) %>% #Convert to datetime
mutate(Employed_percent=as.numeric(Employed_percent)) #Convert percent to numeric not character
#Plot line chart showing employment by nationality in Switzerland over time by nationality
#Create dataframe containing plot data
employment_time_data <- cleaned_data %>%
group_by(Date, Country) %>%
summarise(Employment_average=mean(Employed_percent, na.rm=TRUE)) %>%
ungroup() %>%
mutate(rolling_mean_employment = zoo::rollmean(Employment_average, k = 3, fill = NA))
#Plot time series showing employment from 2013-2023
employment_plot <- employment_time_data %>%
group_by(Country) %>%
plot_ly(x=~Date, y=~rolling_mean_employment) %>%
add_lines(color = ~Country) %>%
layout(title="Employment by Most Common Refugee Nationalities",
yaxis=list(title="Average employment",tickformat = ".1%"))
#Plot line chart showing number of refugees in Switzerland granted B permits over time by nationality
#Create dataframe containing plot data
number_time_data <- cleaned_data %>%
filter(Country!="Gesamttotal") %>%
group_by(Date, Country) %>%
summarise(Employment_average=mean(Employed_percent, na.rm=TRUE),
Total=sum(Total)) %>%
ungroup()
#Plot time series showing employment from 2013-2023
number_plot <- number_time_data %>%
group_by(Country) %>%
plot_ly(x=~Date, y=~Total) %>%
add_lines(color = ~Country) %>%
layout(title="Number of Refugees from most common countries of origin (2013-2023)",
yaxis=list(title="Number of refugees"))
employment_plot
number_plot
3. Plot total number of refugees and employment rate over time by
canton (2013-2023)
- Data downloaded as individual Excel files from:
- The code below was used to extract the total number of refugees, the
fraction of working age and the percentage employed for the top 10 most
frequent refugee nationalities
- Nationalities were:
- Data does not include Ukrainians with S permits
library(sf)
library(tidyverse)
library(plotly)
library(rio)
library(rjson)
library(crosstalk)
# 1. Import Employment and Geo Data ---------------------------------------
#Swiss map data downloaded from:https://cartographyvectors.com/map/1522-switzerland-with-regions
#Import Data
employment_data <- rio::import("..\\Nationality and Employment3.csv")
cantons_abbreviations <- rio::import("..\\Geographic Data/Cantons and Abbreviations.xlsx") #Matches cantons and abbreviations
boundaries_df <- sf::st_read("..\\Geographic Data/Switzerland boundaries.geojson")
## Reading layer `Switzerland boundaries' from data source
## `C:\Users\amand\OneDrive\Bureau\Refugee Employment Data Project\Refugee-Employment-Switzerland\Geographic Data\Switzerland boundaries.geojson'
## using driver `GeoJSON'
## Simple feature collection with 26 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 5.958 ymin: 45.819 xmax: 10.493 ymax: 47.81
## Geodetic CRS: WGS 84
boundaries_json <- rjson::fromJSON(file="..\\Geographic Data/Switzerland boundaries.geojson")
#Clean data and correct types
cleaned_data <- employment_data %>%
mutate(Date=(paste(Date,"-01",sep=""))) %>% #Add day
mutate(Date=as.POSIXct(Date, format = "%Y-%m-%d")) %>% #Convert to datetime
mutate(Employed_percent=as.numeric(Employed_percent))
#Join geographic and employment Data
all_geo_data <- left_join(boundaries_df,cantons_abbreviations,by=c("name"="Canton")) #Join cantons and abbreviations
geo_employment <- left_join(cleaned_data,all_geo_data,by=c("Canton"="Abbreviation")) #Merge geographic and Employment Data
# 2. Plot number of refugees per canton over time -------------------------
#Plot number of refugees per canton over time with employed percentage
employment_all_refugees_df <- geo_employment %>%
select(Date,Country,Employed_percent,name,Total) %>%
filter(Country=="Gesamttotal") %>%
mutate(Employed_percent=ifelse(Employed_percent=="-",NA,Employed_percent)) %>%
mutate(Employed_percent=round(Employed_percent*100,2)) %>%
group_by(year(Date),name) %>%
summarise(Employed_percent=round(mean(Employed_percent,na.rm=TRUE),1),
Total=round(mean(Total,na.rm=TRUE),0))
g <- list(
fitbounds = "locations",
visible = FALSE)
swiss_map <- plot_ly(hoverinfo = "text",
text = ~paste("Canton:", employment_all_refugees_df$name, "<br>",
"Employed:", employment_all_refugees_df$Employed_percent,"%", "<br>",
"Total:", employment_all_refugees_df$Total, "<br>")) %>%
add_trace(
type="choropleth",
geojson=boundaries_json,
locations=employment_all_refugees_df$name,
z=employment_all_refugees_df$Total,
colorscale="Blues",
frame=employment_all_refugees_df$`year(Date)`,
reversescale =T,
zmin=35,
zmax=9590,
featureidkey="properties.name") %>%
layout(geo = g) %>%
colorbar(thickess=20,
#orientation="h",
len=0.5) %>%
layout(title = "Number of Refugees with B Permits (2013-2023)")
# 2. Plot employment of refugees per canton over time -------------------------
g <- list(
fitbounds = "locations",
visible = FALSE)
swiss_map_employment <- plot_ly(hoverinfo = "text",
text = ~paste("Canton:", employment_all_refugees_df$name, "<br>",
"Employed:", employment_all_refugees_df$Employed_percent,"%", "<br>",
"Total:", employment_all_refugees_df$Total, "<br>")) %>%
add_trace(
type="choropleth",
geojson=boundaries_json,
locations=employment_all_refugees_df$name,
z=employment_all_refugees_df$Employed_percent,
colorscale="Blues",
frame=employment_all_refugees_df$`year(Date)`,
reversescale =T,
zmin=20,
zmax=70,
featureidkey="properties.name") %>%
layout(geo = g) %>%
colorbar(thickess=20,
#orientation="h",
len=0.5) %>%
layout(title = "Percentage of employed refugees with B permits (2013-2023)")
swiss_map
swiss_map_employment